In [1]:
%matplotlib inline
import json
import os
import math
from collections import OrderedDict
import pandas as pd
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns
import yaml
from pathlib import Path
from data import CodesInDbs, Mappings, Databases
from IPython.display import Latex
pd.set_option('display.max_colwidth', 100)
sns.set_style('whitegrid')
sns.set_context("paper", font_scale=2)
#plt.rcParams['figure.figsize'] = (4, 3)
#plt.rc("savefig", dpi=150)
measures_palette = sns.color_palette('Set1', n_colors=2, desat=.5)
measures_palette.reverse()
def graded_recall_palette(n_colors, rev=True):
palette = sns.color_palette("Blues", n_colors=n_colors, desat=.6)
if rev:
palette.reverse()
return palette
def graded_precision_palette(n_colors, rev=True):
palette = sns.color_palette("Reds", n_colors=n_colors, desat=.6)
if rev:
palette.reverse()
return palette
def mystyle(palette=None, xrot=0, ha='center', ylim=(0,1), ylabel=None, savefig=None):
class C:
def __enter__(self):
if palette is not None:
palette.__enter__()
def __exit__(self, exc_type, value, traceback):
if palette is not None:
palette.__exit__(exc_type, value, traceback)
if exc_type is None:
ax = plt.gca()
sns.despine(left=True, ax=ax)
ax.grid(False, axis='x')
if ax.legend_:
lgd = ax.legend(loc=2, bbox_to_anchor=(1, 1))
else:
lgd = None
if ax.get_lines():
ax.get_lines()[0].set_visible(False)
ax.set_ylim(*ylim)
plt.xticks(rotation=xrot, ha=ha)
if ylabel is not None:
ax.yaxis.label(ylabel)
if savefig:
filename = '{}-{}'.format(PROJECT, savefig)
plt.savefig(filename, bbox_extra_artists=[lgd] if lgd else [], bbox_inches='tight')
return C()
def draw_lines(ys, palette=None):
if palette is None:
palette = sns.color_palette()
ax = plt.gca()
ax.axhline(0, 0, 0) # First axhline is not visible??
for y, color in zip(ys, palette):
ax.axhline(y, color=color, zorder=-100)
return ax
"""
pd.set_option('display.notebook_repr_html', True)
def _repr_latex_(self):
#return r"\begin{center}%s\end{center}" %
return self.to_latex()
pd.DataFrame._repr_latex_ = _repr_latex_ # monkey patch pandas DataFrame
"""
PROJECT = 'safeguard' # os.getenv('COMAP_PROJECT')
print("PROJECT:", PROJECT)
In [2]:
with open('../projects/{}/config.yaml'.format(PROJECT)) as f:
config = yaml.load(f)
databases = Databases.of_config(config)
coding_systems = config['coding-systems']
events = config["events"]
variations = config['variations']
event_names = {}
for event in events:
casedef = yaml.load(open('../projects/{}/case-definitions/{}.yaml'.format(PROJECT, event)))
event_names[event] = casedef['name']
with open('../{}.mappings.json'.format(PROJECT)) as f:
mappings = Mappings.of_data(json.load(f))
with open('../codes-in-dbs.json') as f:
codes_in_dbs = CodesInDbs.of_data(json.load(f))
coding_systems = ["ICD-9", "ICD-10", "ICPC-2", "READ-2"]
def database_label(database):
#return database
#return "{} ({})".format(database, databases.coding_system(database))
return {
"ICD10CM": "ICD-10",
"ICD10/CM": "ICD-10",
"RCD2": "READ-2",
"ICPC2EENG": "ICPC-2",
"ICD9CM": "ICD-9",
}[databases.coding_system(database)]
def measure_label(measure):
return {
"recall": "Sensitivity",
"precision": "PPV", # "Positive predictive value",
}[measure]
def event_label(event):
return event_names[event]
def len_if_notnull(x):
if x != x:
return 0
else:
return len(x)
In [3]:
ev = pd.read_csv('../{}.evaluations.csv'.format(PROJECT))
for key in ['generated', 'reference', 'tp', 'fp', 'fn']:
ev[key] = ev[key].map(lambda x: x if x != x else json.loads(x))
ev['variation event database recall precision'.split()].head()
Out[3]:
In [4]:
df_m = mappings.describe()
df_m.index = df_m.index.map(database_label)
df_m.columns = df_m.columns.map(event_label)
df_m.index.name = 'Inclusion codes'
df_m['Sum'] = df_m.iloc[:4,:7].sum(axis=1)
df_m['Average'] = df_m.iloc[:4,:7].mean(axis=1).round(2)
#df.ix['Sum'] = df.iloc[:4, :7].sum()
#df.ix['Average'] = df.iloc[:4, :7].mean().round(2)
#df.ix['Sum']['Sum'] = df['Sum'].sum()
#df.ix['Average']['Average'] = df['Average'].mean()
df_m.fillna('-').T[['ICD-9', 'ICD-10', 'ICPC-2', 'READ-2']]
Out[4]:
In [5]:
df_e = mappings.describe(exclusions=True)
df_e.index = df_e.index.map(database_label)
df_e.columns = df_e.columns.map(event_label)
df_e.index.name = 'Exclusion codes'
df_e['Sum'] = df_e.iloc[:4,:7].sum(axis=1)
df_e['Average'] = df_e.iloc[:4,:7].mean(axis=1).round(2)
df_e.fillna('-').T[['ICD-9', 'ICD-10', 'ICPC-2', 'READ-2']]
Out[5]:
In [6]:
def combine_pair(t):
return '{} ({})'.format(t.inc, t.exc)
def combine_row(inc, exc):
return (pd.DataFrame({'inc': inc.fillna('-'), 'exc': exc.fillna('-')})
.apply(combine_pair, axis=1))
df = df_m.astype('float64').combine(df_e.astype('float64'), combine_row)
df = df.T[['ICD-9', 'ICD-10', 'ICPC-2', 'READ-2']]
df.index.name = 'Events'
df
Out[6]:
In [7]:
pd.DataFrame([
(database, databases.coding_system(database), database_label(database))
for database in databases.databases()
], columns=("Database", "Coding system", "Label")).set_index("Database")
Out[7]:
In [8]:
types_distr = pd.DataFrame(json.load(open('../{}.types-distrs.json'.format(PROJECT)))).T
df = pd.DataFrame()
df['All'] = types_distr.groupby('group')[['pos', 'neg']].sum().sum()
df['All %'] = df['All'] / df['All'].sum()
df['DISO'] = types_distr.groupby('group')[['pos', 'neg']].sum().ix['DISO']
df['DISO %'] = df['DISO'] / df['DISO'].sum()
df
Out[8]:
Number of concepts in each mapping
In [9]:
df = (ev[ev.variation == 'baseline0'].
groupby('event').
first().
cuis.
map(json.loads).
map(len).
to_frame('#CUIs'))
df.index = df.index.map(event_label)
df.ix['SUM'] = df['#CUIs'].sum()
df.T
Out[9]:
Number of generated, reference codes and confusion by coding system.
In [10]:
df = ev[ev.variation == 'baseline0'][['event', 'database', 'generated', 'reference', 'tp', 'fp', 'fn']]
for key in ['generated', 'reference', 'tp', 'fp', 'fn']:
df[key] = df[key].map(len_if_notnull)
df['database'] = df['database'].map(database_label)
df.groupby('database').sum()
Out[10]:
In [11]:
df = pd.DataFrame([
ev[ev.variation == 'baseline0'].groupby('database').recall.mean(),
ev[ev.variation == 'baseline0'].groupby('database').precision.mean(),
])
df.index = df.index.map(measure_label)
df.columns = df.columns.map(database_label)
df = df[coding_systems]
df['Average'] = df.mean(axis=1)
with mystyle(measures_palette, savefig='baseline-performance-by-db.pdf'):
with sns.plotting_context(font_scale=1):
ax = draw_lines(df['Average'])
df.iloc[:,:-1].T.plot(kind='bar', title='Baseline_0', ax=ax)
baseline0_performance = df
df.round(3)
Out[11]:
In [12]:
df = ev[ev.variation == 'baseline'][['event', 'database', 'generated', 'reference', 'tp', 'fp', 'fn']]
for key in 'generated reference tp fp fn'.split():
df[key] = df[key].map(len_if_notnull)
df['database'] = df['database'].map(database_label)
df.groupby('database').sum()
Out[12]:
Number of concepts in the mapping
In [13]:
df = (ev[ev.variation == 'baseline'].
groupby('event').
first().
cuis.
map(json.loads).
map(len).
to_frame('#CUIs'))
df.index = df.index.map(event_label)
df.ix['SUM'] = df['#CUIs'].sum()
df.T
Out[13]:
In [14]:
df = pd.DataFrame([
ev[ev.variation == 'baseline'].groupby('database').recall.mean(),
ev[ev.variation == 'baseline'].groupby('database').precision.mean(),
])
df.index = df.index.map(measure_label)
df.columns = df.columns.map(database_label)
df = df[coding_systems]
df['Average'] = df.mean(axis=1)
with mystyle(measures_palette, savefig='baseline-performance-by-db.pdf'):
with sns.plotting_context(font_scale=1):
ax = draw_lines(df['Average'])
df.iloc[:,:-1].T.plot(kind='bar', title='Baseline', ax=ax)
baseline_performance = df
df.round(3)
Out[14]:
In [15]:
df = pd.DataFrame([
ev[ev.variation == 'baseline'].groupby('event').recall.mean(),
ev[ev.variation == 'baseline'].groupby('event').precision.mean(),
])
df.index = df.index.map(measure_label)
df.columns = df.columns.map(event_label)
df['Average'] = df.mean(axis=1)
with mystyle(measures_palette, xrot=45, ha='right', savefig='baseline-performance-by-event.pdf'):
ax = draw_lines(df['Average'])
df.iloc[:,:-1].T.plot(kind='bar', title='Baseline', ax=ax)
df.round(3)
Out[15]:
In [16]:
df = ev[ev.variation == 'max-recall'][['event', 'database', 'generated', 'reference', 'tp', 'fp', 'fn']]
for key in ['generated', 'reference', 'tp', 'fp', 'fn']:
df[key] = df[key].map(len_if_notnull)
df['database'] = df['database'].map(database_label)
df = df.groupby('database').sum()
df.ix['Overall'] = df.sum()
df['fn/reference'] = df['fn'] / df['reference']
#df['tp/generated'] = 1 - (df.tp / df.generated).round(3)
df.round(3)
Out[16]:
In [17]:
df = pd.DataFrame([
ev[ev.variation == 'max-recall'].groupby('database').recall.mean(),
ev[ev.variation == 'max-recall'].groupby('database').precision.mean(),
])
df.index = df.index.map(measure_label)
df.columns = df.columns.map(database_label)
df = df[coding_systems]
df['Average'] = df.mean(axis=1)
with mystyle(measures_palette, ylim=(0,1), savefig='max-recall-performance-by-db.pdf'):
ax = draw_lines(df['Average'])
df.iloc[:,:-1].T.plot(kind='bar', title='Maximum recall', ax=ax)
maxrecall_performance = df
df.round(3)
Out[17]:
In [18]:
df = pd.DataFrame([
ev[ev.variation == 'max-recall'].groupby('database').recall.mean(),
])
df.index = df.index.map(measure_label)
df.columns = df.columns.map(database_label)
df = df[coding_systems]
df['Average'] = df.mean(axis=1)
with mystyle(measures_palette, ylim=(.9, 1), savefig='max-recall-recall-by-db.pdf'):
ax = draw_lines(df['Average'])
df.iloc[:,:-1].T.plot(kind='bar', legend=False, title='Maximum recall', ax=ax)
plt.ylabel(measure_label('recall'))
df.round(3)
Out[18]:
In [19]:
with open('../{}.code-stats.csv'.format(PROJECT)) as f:
code_stats = pd.read_csv(f)
stats = pd.DataFrame()
stats['Mapping'] = (code_stats[code_stats.InMapping]
.groupby('Database')
.Code.count())
stats['Not maximum-recall'] = (code_stats[code_stats.InMapping & ~code_stats.InDnf]
.groupby('Database')
.Code.count())
stats = stats.fillna(0)
stats['% of missing'] = (stats['Not maximum-recall'] / stats['Not maximum-recall'].sum()).map("{:.2%}".format)
stats['% of mapping'] = (stats['Not maximum-recall'] / stats['Mapping']).map("{:.2%}".format)
stats.index = stats.index.map(database_label)
stats
Out[19]:
In [20]:
max_recall_fn = ev[(ev.variation == 'max-recall') & (ev.recall < 1)][["database", "fn"]]
max_recall_fn.database = max_recall_fn.database.map(database_label)
max_recall_fn = max_recall_fn.groupby('database').fn.sum().to_frame('fn')
max_recall_fn['fn'] = max_recall_fn['fn'].map(lambda x: set() if x != x else set(x)).map(', '.join)
max_recall_fn.index.name = 'Database'
max_recall_fn.columns = ['False negatives of maximum recall']
max_recall_fn
Out[20]:
CPRD
: READ2 codes from the reference are mapped to READ CTV3 codes that are not in UMLS, for example 7L1H6
(READ2) -> XaM3E
, XaPuP
, 7L1H6
, 7L1h6
.
In [21]:
averages_compare = pd.DataFrame([
ev[ev.variation == 'max-recall'].groupby('event').recall.mean(),
ev[ev.variation == 'max-recall'].groupby('event').precision.mean(),
])
averages_compare.index = averages_compare.index.map(measure_label)
averages_compare.columns = averages_compare.columns.map(event_names.get)
averages_compare['Average'] = averages_compare.mean(axis=1)
with mystyle(measures_palette, xrot=45, ha='right', savefig='max-recall-by-event.pdf'):
ax = draw_lines(averages_compare['Average'])
averages_compare.iloc[:,:-1].T.plot(kind='bar', title="Maximum recall", ax=ax)
averages_compare.round(3)
Out[21]:
In [22]:
compare_variations = OrderedDict([
('3-RN-RB.expand', 'Expand 3 RN, RB'),
('3-CHD-PAR.expand', 'Expand 3 PAR, CHD'),
('3-RN-CHD-RB-PAR.expand', 'Expand 3 RN, CHD, RB, PAR'),
])
averages_compare = pd.DataFrame([
ev[ev.variation == variation].groupby('database').recall.mean()
for variation in compare_variations
], index=compare_variations)
averages_compare.columns = averages_compare.columns.map(database_label)
averages_compare.index = compare_variations.values()
averages_compare = averages_compare[coding_systems]
averages_compare['Average'] = averages_compare.mean(axis=1)
with mystyle(graded_recall_palette(len(compare_variations), rev=0), savefig='relations-recall-by-db.pdf'):
ax = draw_lines(averages_compare['Average'])
averages_compare.iloc[:, :-1].T.plot(kind='bar', title="Relations in expansion step 3", ax=ax)
plt.ylabel(measure_label('recall'))
print(averages_compare)
In [23]:
compare_variations = OrderedDict([
('3-RN-RB.expand', 'Expand 3 RN, RB'),
('3-CHD-PAR.expand', 'Expand 3 PAR, CHD'),
('3-RN-CHD-RB-PAR.expand', 'Expand 3 RN, CHD, RB, PAR'),
])
averages_compare = pd.DataFrame([
ev[ev.variation == variation].groupby('database').precision.mean()
for variation in compare_variations
], index=compare_variations)
averages_compare.columns = averages_compare.columns.map(database_label)
averages_compare.index = compare_variations.values()
averages_compare = averages_compare[coding_systems]
averages_compare['Average'] = averages_compare.mean(axis=1)
with mystyle(graded_recall_palette(len(compare_variations), rev=0), savefig='relations-recall-by-db.pdf'):
ax = draw_lines(averages_compare['Average'])
averages_compare.iloc[:, :-1].T.plot(kind='bar', title="Relations in expansion step 3", ax=ax)
plt.ylabel(measure_label('precision'))
print(averages_compare)
In [24]:
compare_variations = OrderedDict([
('4-RN-RB.expand', 'Expand 4 RN, RB'),
('4-CHD-PAR.expand', 'Expand 4 PAR, CHD'),
('4-RN-CHD-RB-PAR.expand', 'Expand 4 RN, CHD, RB, PAR'),
])
averages_compare = pd.DataFrame([
ev[ev.variation == variation].groupby('database').recall.mean()
for variation in compare_variations
], index=compare_variations)
averages_compare.columns = averages_compare.columns.map(database_label)
averages_compare.index = compare_variations.values()
averages_compare = averages_compare[coding_systems]
averages_compare['Average'] = averages_compare.mean(axis=1)
with mystyle(graded_recall_palette(len(compare_variations), rev=0), savefig='relations-recall-by-db.pdf'):
ax = draw_lines(averages_compare['Average'])
averages_compare.iloc[:, :-1].T.plot(kind='bar', title="Relations in expansion step 4", ax=ax)
plt.ylabel(measure_label('recall'))
print(averages_compare)
In [25]:
compare_variations = OrderedDict([
('baseline', 'Baseline'),
('1-RN-RB.expand', 'RN, RB'),
('1-RN-CHD.expand', 'RN, CHD'),
('1-RB-PAR.expand', 'RB, PAR'),
('1-PAR-CHD.expand', 'PAR, CHD'),
('1-RN-CHD-RB-PAR.expand', 'RN, CHD, RB, PAR'),
])
averages_compare = pd.DataFrame([
ev[ev.variation == variation].groupby('database').recall.mean()
for variation in compare_variations
], index=compare_variations)
averages_compare.columns = averages_compare.columns.map(database_label)
averages_compare.index = compare_variations.values()
averages_compare = averages_compare[coding_systems]
averages_compare['Average'] = averages_compare.mean(axis=1)
with mystyle(graded_recall_palette(len(compare_variations), rev=0), savefig='relations-recall-by-db.pdf'):
ax = draw_lines(averages_compare['Average'])
averages_compare.iloc[:, :-1].T.plot(kind='bar', title="Relations for expansion", ax=ax)
plt.ylabel(measure_label('recall'))
In [26]:
compare_variations = OrderedDict([
('baseline', 'Baseline'),
('1-RN-RB.expand', 'RN, RB'),
('1-RN-CHD.expand', 'RN, CHD'),
('1-RB-PAR.expand', 'RB, PAR'),
('1-PAR-CHD.expand', 'PAR, CHD'),
('1-RN-CHD-RB-PAR.expand', 'RN, CHD, RB, PAR'),
])
averages_compare = pd.DataFrame([
ev[ev.variation == variation].groupby('event').recall.mean()
for variation in compare_variations
], index=compare_variations)
averages_compare.columns = averages_compare.columns.map(event_names.get)
averages_compare.index = compare_variations.values()
averages_compare['Average'] = averages_compare.mean(axis=1)
with mystyle(graded_recall_palette(len(compare_variations), rev=0), xrot=45, ha='right', savefig='relations-recall-by-event.pdf'):
ax = draw_lines(averages_compare['Average'])
averages_compare.iloc[:,:-1].T.plot(kind='bar', title="Relations for expansion", ax=ax)
plt.ylabel(measure_label('recall'))
In [27]:
variations_names = OrderedDict([
('baseline', 'baseline'),
('1-RN-CHD-RB-PAR.expand', 'expand$_1$'),
('2-RN-CHD-RB-PAR.expand', 'expand$_2$'),
('3-RN-CHD-RB-PAR.expand', 'expand$_3$'),
('4-RN-CHD-RB-PAR.expand', 'expand$_4$'),
])
df = pd.DataFrame({
name: ev[ev.variation == variation].groupby('database').recall.mean()
for variation, name in variations_names.items()
}).T
df.columns = df.columns.map(database_label)
df = df[coding_systems]
df['Average'] = df.mean(axis=1)
with mystyle(graded_recall_palette(len(variations_names), rev=0), savefig='steps-recall-by-db.pdf'):
ax = draw_lines(df['Average'])
df.iloc[:-1,:-1].T.plot(kind='bar', ax=ax)
plt.ylabel(measure_label('recall'))
df.round(3)
Out[27]:
Exclusion codes are not in the evaluation any more. See note above.
The IPCI mapping contains very broad codes that are refined with additional terms. For example
K24
(Fear of heart attack)K90
(stroke)K93
(Pulmonary embolism)D70
(Dementia) OR "dementia" AND "infarct"U14
(Kidney symptom/complaint ) OR "nier" AND "infarct"
In [28]:
expands_performance = OrderedDict()
for i in [1,2,3,4]:
v = '{}-RN-CHD-RB-PAR.expand'.format(i)
df = pd.DataFrame([
ev[ev.variation == v].groupby('database').recall.mean(),
ev[ev.variation == v].groupby('database').precision.mean(),
])
df.index = df.index.map(measure_label)
df.columns = df.columns.map(database_label)
df = df[coding_systems]
df['Average'] = df.mean(axis=1)
#with mystyle(measures_palette, ylim=(0,1), savefig='max-recall-performance-by-db.pdf'):
# ax = draw_lines(df['Average'])
# df.iloc[:,:-1].T.plot(kind='bar', title='Maximum recall', ax=ax)
expands_performance['expand_{}'.format(i)] = df
In [29]:
num_concepts = pd.Series(OrderedDict([
(var_name, ev[(ev.variation == var) & (ev.cuis.notnull())]
.groupby('event').first()
.cuis.map(json.loads).map(len)
.sum())
for var_name, var in [('baseline0', 'baseline0'), ('baseline', 'baseline')] + \
[('expand_{}'.format(i), '{}-RN-CHD-RB-PAR.expand'.format(i)) for i in range(1,5)] + \
[(('max-sensitivity', 'max-recall'))]
])).to_frame('Concepts')
num_concepts
Out[29]:
In [30]:
performances = OrderedDict()
performances['baseline_0'] = baseline0_performance
performances['baseline'] = baseline_performance
for v in expands_performance:
performances[v] = expands_performance[v]
performances['max_sensitivity'] = maxrecall_performance
performances_df = pd.concat(performances).round(3)
performances_df
Out[30]:
In [31]:
s = (performances_df
.set_index(performances_df.index.rename(['Variation', 'Measurement']))
.stack()
.reset_index()
.rename(columns={'level_2': 'Terminology', 0: 'Value'})
)
s.head()
Out[31]:
In [170]:
ev1 = (ev[['cuis', 'variation', 'event', 'database', 'recall', 'precision', 'tp', 'fp', 'fn']]
[ev.variation.isin(['baseline']+['{}-RN-CHD-RB-PAR.expand'.format(n) for n in [1,2,3,4]])]
.replace({'variation': {'baseline': '0-baseline'}})
.replace({'variation': {'{}-RN-CHD-RB-PAR.expand'.format(n): "{}-expansion".format(n) for n in [1,2,3,4]}})
.sort_values(by=['variation', 'database', 'event'])
.copy())
for f in ['cuis', 'generated', 'reference', 'tp', 'fp', 'fn']:
ev1[f] = ev[f].fillna('').map(len)#lambda x: len(x) if x == x else '-')
ev1.head()
Out[170]:
In [164]:
ev2 = ev1.groupby(['variation', 'database']).aggregate(OrderedDict([
('recall', np.mean),
('precision', np.mean),
('cuis', sum),
('generated', sum), ('reference', sum), ('tp', sum), ('fp', sum), ('fn', sum)
]))
ev2.head()
Out[164]:
Verification of macro-average performance measures
In [165]:
(ev1[['variation', 'database', 'event', 'generated' ,'reference', 'tp']]
.assign(precision1=lambda df: df.tp / df.generated)
.assign(recall1=lambda df: df.tp / df.reference)
.groupby(('variation', 'database'))
.aggregate(OrderedDict([('recall1', np.mean), ('precision1', np.mean)]))
.reset_index()
.groupby('variation')
.aggregate(OrderedDict([('recall1', np.mean), ('precision1', np.mean)]))
.round(2))
Out[165]:
Micro-average performance measures
In [166]:
(ev1
.groupby('variation')
.aggregate(OrderedDict([('recall', np.mean), ('precision', np.mean)]))
.round(2))
Out[166]:
Micro-performance measures
In [230]:
ev3a = (ev1
.groupby(['variation', 'database'])
.aggregate(dict({key: lambda s: s.fillna(0).sum() for key in 'generated reference tp fp fn'.split()}, cuis=np.mean, **{'precision': np.mean, 'recall': np.mean}))
.reset_index())
ev3b = (ev3a
.groupby('variation')
.aggregate(dict({key: lambda s: s.fillna(0).sum() for key in 'generated reference tp fp fn'.split()}, cuis=np.mean))
['cuis generated reference tp fp fn'.split()]
.assign(recall=lambda df: df.tp / df.reference,
precision=lambda df: df.tp / df.generated)
.assign(database='ZZZ')
.reset_index())
ev3 = (pd.concat([ev3a, ev3b])
.sort_values(['variation', 'database'])
['variation database cuis generated reference tp fp fn recall precision'.split()]
.set_index(['variation', 'database']))
ev3
Out[230]:
In [167]:
(ev1
.groupby('variation')
.aggregate({key: lambda s: s.fillna(0).sum() for key in 'generated reference tp fp fn'.split()})
.assign(recall=lambda df: df.tp / df.reference)
.assign(precision=lambda df: df.tp / df.generated)
['recall precision generated reference tp fp fn'.split()]
.round(2))
Out[167]:
In [32]:
# Remove [s.Terminology == 'Average'] for all terminologies
variation_names = {
'baseline': 'Baseline',
'baseline_0': None,
'expand_1': '1 expansion step',
'expand_2': '2 expansion steps',
'expand_3': '3 expansion steps',
'expand_4': None,
'max_sensitivity': '(Maximum sensitivity)'
}
s1 = s.copy()
s1['Code sets'] = s1.Variation.map(variation_names)
s1 = s1[s1.Variation.notnull()]
g = (sns.factorplot(kind='bar', data=s1[s1.Terminology == 'Average'],
x='Measurement', y='Value', col='Terminology', hue='Code sets',
saturation=1, legend=True, legend_out=True, size=4, aspect=2,
#palette=sns.color_palette("Set2", 7),
palette=sns.color_palette("Set1", n_colors=5, desat=.5),
hue_order=['Baseline', '1 expansion step', '2 expansion steps', '3 expansion steps', '(Maximum sensitivity)'])
.set_titles('') #Performance (average over events and vocabularies)")
.set_xlabels('')
.set_ylabels('')
.set(ylim=(0, 1))
.despine(left=True))
for p in g.axes[0][0].patches:
height = p.get_height()
g.ax.text(p.get_x()+1/12, height-0.025, '%.2f' % height,
fontsize=10, horizontalalignment='center', verticalalignment='top', color='white')
g.savefig('performance.pdf')
In [33]:
compare_variations = OrderedDict([
('baseline', 'Baseline'),
('1-RN-CHD-RB-PAR.expand', 'Expansion 1 step'),
('2-RN-CHD-RB-PAR.expand', 'Expansion 2 steps'),
('3-RN-CHD-RB-PAR.expand', 'Expansion 3 steps'),
('4-RN-CHD-RB-PAR.expand', 'Expansion 4 steps'),
])
averages_compare = pd.DataFrame([
ev[ev.variation == variation].groupby('database').precision.mean()
for variation in compare_variations
], index=compare_variations)
averages_compare.columns = averages_compare.columns.map(database_label)
averages_compare.index = compare_variations.values()
averages_compare = averages_compare[coding_systems]
averages_compare['Average'] = averages_compare.mean(axis=1)
with mystyle(graded_precision_palette(len(compare_variations), rev=0), savefig='steps-precision-by-db.pdf'):
ax = draw_lines(averages_compare['Average'])
averages_compare.T.plot(kind='bar', title="Expansion steps", ax=ax)
plt.ylabel(measure_label('precision'))
averages_compare.round(3)
Out[33]:
In [34]:
compare_variations = OrderedDict([
('baseline', 'Baseline'),
('1-RN-CHD-RB-PAR.expand', '1 step'),
('2-RN-CHD-RB-PAR.expand', '2 steps'),
('3-RN-CHD-RB-PAR.expand', '3 steps'),
# ('4-RN-CHD-RB-PAR.expand', '4 steps'),
])
averages_compare = pd.DataFrame([
ev[ev.variation == variation].groupby('event').recall.mean()
for variation in compare_variations
], index=compare_variations)
averages_compare.columns = averages_compare.columns.map(event_names.get)
averages_compare.index = compare_variations.values()
averages_compare['Average'] = averages_compare.mean(axis=1)
with mystyle(graded_recall_palette(len(compare_variations), rev=0), xrot=45, ha='right', savefig='steps-recall-by-event.pdf'):
ax = draw_lines(averages_compare['Average'])
averages_compare.T.plot(kind='bar', title="Expansion steps", ax=ax)
plt.ylabel(measure_label('recall'))
In [35]:
compare_variations = OrderedDict([
('baseline', 'Baseline'),
('1-RN-CHD-RB-PAR.expand', '1 step'),
('2-RN-CHD-RB-PAR.expand', '2 steps'),
('3-RN-CHD-RB-PAR.expand', '3 steps'),
# ('4-RN-CHD-RB-PAR.expand', '4 steps'),
])
averages_compare = pd.DataFrame([
ev[ev.variation == variation].groupby('event').precision.mean()
for variation in compare_variations
], index=compare_variations)
averages_compare.columns = averages_compare.columns.map(event_names.get)
averages_compare.index = compare_variations.values()
averages_compare['Average'] = averages_compare.mean(axis=1)
with mystyle(graded_precision_palette(len(compare_variations), rev=0), xrot=45, ha='right', savefig='steps-precision-by-event.pdf'):
ax = draw_lines(averages_compare['Average'])
averages_compare.T.plot(kind='bar', title="Expansion steps", ax=ax)
plt.ylabel(measure_label('precision'))
In [36]:
measures = ['recall', 'precision']
averages_compare = pd.DataFrame([
ev[ev.variation == '3-RN-CHD-RB-PAR.expand'].groupby('database')[measure].mean()
for measure in measures
], index=map(measure_label, measures))
averages_compare.columns = averages_compare.columns.map(database_label)
averages_compare = averages_compare[coding_systems]
averages_compare['Average'] = averages_compare.mean(axis=1)
name = 'expansion3-performance-by-db'
with mystyle(measures_palette, savefig=name+'.pdf'):
ax = draw_lines(averages_compare['Average'])
averages_compare.iloc[:,:-1].T.plot(kind='bar', title="Performance of 3-step expansion", ax=ax)
averages_compare.to_csv(name+'.csv')
In [37]:
variation = '3-RN-CHD-RB-PAR.expand'
with open("../{}.{}.error-analyses.yaml".format(PROJECT, variation)) as f:
error_analyses = yaml.load(f)
def get_category(fn_or_fp, database, event, code):
if database in error_analyses[fn_or_fp] and event in error_analyses[fn_or_fp][database]:
return error_analyses[fn_or_fp][database][event]['code-categories'].get(code) or '?'
else:
return '??'
evs = ev[(ev.variation == variation) & ev.fn.notnull()][['event', 'database', 'fn', 'fp']]
fn = evs.apply(lambda row: pd.Series(row.fn), axis=1).stack().reset_index(level=1, drop=True)
fn.name = 'code'
# fns : | event | database | code |
fns = evs.drop(['fn', 'fp'], axis=1).join(fn, how='inner').drop_duplicates()
fns['category'] = fns.apply(lambda r: get_category('fn', r.database, r.event, r.code), axis=1)
fp = evs.apply(lambda row: pd.Series(row.fp), axis=1).stack().reset_index(level=1, drop=True)
fp.name = 'code'
# fps : | event | database | code |
fps = evs.drop(['fn', 'fp'], axis=1).join(fp, how='inner').drop_duplicates()
fps['category'] = fps.apply(lambda r: get_category('fp', r.database, r.event, r.code), axis=1)
fns.groupby(['category', 'database']).code.aggregate(lambda s: set(s)).map(', '.join).to_frame()
Out[37]:
In [38]:
fps.groupby(['category', 'database']).code.aggregate(lambda s: set(s)).map(', '.join).to_frame()
Out[38]:
In [39]:
code_counts = pd.Series({
database: len(set(mappings.all_codes(database)))
for database in databases.databases()
})
code_counts.ix['All'] = code_counts.sum()
code_counts.index.name = 'database'
def category_label(category):
return {
# FN
'not-in-dnf': 'Not in UMLS',
'database-specific': 'DB specific',
'next-expansion': 'expansion_{4}',
'isolated': 'Isolated',
# FP
'in-dnf': 'Cosynonym',
'other-fp': 'Indexing FP',
}.get(category, category)
def counts(code_categories, FN_or_FP):
"code_categories : | code | category |"
# (database, category) | int
s1 = code_categories.groupby('database').category.value_counts()
# category | int
s2 = code_categories.category.value_counts()
s2.index = pd.MultiIndex.from_product([['Overall'], s2.index])
res = pd.concat([s1, s2]).to_frame('count')
res['ratio'] = res['count'] / s2.sum()
res['%'] = res['ratio'].map('{:.1%}'.format)
#res['% (mapping)'] = (res['count'] / code_counts).map('{:.1%}'.format)
res = res.rename(columns={'count': '{} category'.format(FN_or_FP)}).reset_index()
res['category'] = res['category'].map(category_label)
res['database'] = res['database'].map(lambda db: db if db == 'Overall' else database_label(db))
res['error-type'] = [FN_or_FP] * len(res)
return res
fp_counts = counts(fps, 'FP')
fp_counts
Out[39]:
In [40]:
fn_counts = counts(fns, 'FN')
fn_counts
Out[40]:
In [41]:
category_names = {
'DB specific': 'No synonym in reference',
'Indexing FP': 'No TP synonym',
'Cosynonym': 'Sibling of TP code'
}
data = pd.concat([
(fn_counts[fn_counts.database == 'Overall']
.rename(columns={'FN category': 'Count'})
.assign(Category=lambda df: df.category.map(category_names))),
(fp_counts[fp_counts.database == 'Overall']
.rename(columns={'FP category': 'Count'})
.assign(Category=lambda df: df.category.map(category_names)))
])
print(df)
(sns.factorplot(kind='bar', data=data[data['error-type'] == 'FP'], x='category', y='ratio',
legend=True, legend_out=True, size=4, ci=None))
Out[41]:
In [42]:
(sns.factorplot(kind='bar', data=data[data['error-type'] == 'FN'], x='category', y='ratio',
legend=True, legend_out=True, size=4, ci=None))
Out[42]:
measures = OrderedDict([
('recall', measure_label('recall')),
('recall_in_umls', '{} to reference in UMLS'.format(measure_label('recall'))),
('recall_without_exclusions', '{} over inclusion codes'.format(measure_label('recall'))),
('recall_without_exclusions_in_umls', '{} over inclusion codes in UMLS'.format(measure_label('recall'))),
('', ''),
('precision', measure_label('precision')),
('precision_over_dnf', '{} over maximum recall'.format(measure_label('precision'))),
])
averages_compare = pd.DataFrame([
ev[ev.variation == '3-RN-CHD-RB-PAR.expand'].groupby('database')[measure].mean()\
if measure else\
pd.Series([0] * len(ev.database.unique()), index=ev.database.unique())
for measure in measures
], index=measures.values())
averages_compare.columns = averages_compare.columns.map(database_label)
p = sns.color_palette(graded_recall_palette(5)[:-1] + [(1,1,1)] + graded_precision_palette(3)[:-1])
with mystyle(p, savefig='expansion3-error-analysis-by-db.pdf'):
averages_compare.T.plot(kind='bar', title="Error analysis of 3-step expansion")
averages_compare = pd.DataFrame([
ev[ev.variation == '3-RN-CHD-RB-PAR.expand'].groupby('event')[measure].mean()\
if measure else\
pd.Series([0] * len(ev.event.unique()), index=ev.event.unique())
for measure in measures
], index=measures.values())
averages_compare.columns = averages_compare.columns.map(event_label)
p = sns.color_palette(graded_recall_palette(5)[:-1] + [(1,1,1)] + graded_precision_palette(3)[:-1])
with mystyle(p, savefig='expansion3-error-analysis-by-db.pdf', xrot=45, ha='right'):
averages_compare.T.plot(kind='bar', title="Error analysis of 3-step expansion")
residuals = ev[ev.variation == '3-RN-CHD-RB-PAR.expand']
residuals.fn_inclusions_in_umls = residuals.fn_inclusions_in_umls\
.fillna('NaN').map(json.loads)
def get_missed(row):
if math.isnan(row.recall_without_exclusions_in_umls):
return ''
else:
reference = set(json.loads(row.reference_inclusions_in_umls))
return "{}/{}".format(len(row.fn_inclusions_in_umls), len(reference))
residuals['missed'] = residuals.apply(get_missed, axis=1)
residuals.fn_inclusions_in_umls = residuals.fn_inclusions_in_umls\
.map(lambda s: ', '.join(s) if type(s) == list else 'N/A')
residuals.database = residuals.database.map(database_label)
residuals.event = residuals.event.map(event_label)
residuals.recall_without_exclusions_in_umls = residuals.recall_without_exclusions_in_umls\
.map('{:.2f}'.format)
residuals = residuals.sort_index(by=['database', 'event']).reset_index(drop=True)
residuals = residuals[['database', 'event', 'recall_without_exclusions_in_umls', 'missed', 'fn_inclusions_in_umls']]
residuals.columns = ["Database", "Event", "Recall", "Missed", "Residual FNs"]
#residuals = residuals.set_index(['Database', 'Event'])["Residual FNs"].unstack()
residuals
compare_variations = OrderedDict([
('baseline', 'Baseline'),
('baseline.filter-gen', 'Filtered'),
])
averages_compare = pd.DataFrame([
ev[ev.variation == variation].groupby('database').precision.mean()
for variation in compare_variations
], index = compare_variations.values())
averages_compare.columns = averages_compare.columns.map(database_label)
with mystyle(graded_precision_palette(len(compare_variations)), savefig='filtered-baseline-precision-by-db.pdf'):
averages_compare.T.plot(kind='bar', title="Filtered baseline")
plt.ylabel(measure_label('precision'))
compare_variations = OrderedDict([
('baseline', 'Baseline'),
('baseline.filter-gen', 'Filtered'),
])
averages_compare = pd.DataFrame([
ev[ev.variation == variation].groupby('event').precision.mean()
for variation in compare_variations
], index = compare_variations.values())
averages_compare.columns = averages_compare.columns.map(event_label)
with mystyle(graded_precision_palette(len(compare_variations)), xrot=45, ha='right', savefig='filtered-baseline-precision-by-event.pdf'):
averages_compare.T.plot(kind='bar', title="Filtered baseline")
plt.ylabel(measure_label('precision'))
compare_variations = OrderedDict([
('3-RN-CHD-RB-PAR.expand', 'Expand 3 steps'),
('3-RN-CHD-RB-PAR.expand.filter-gen', 'Expand 3 steps, filter'),
])
averages_compare = pd.DataFrame([
ev[ev.variation == variation].groupby('database').precision.mean()
for variation in compare_variations
], index = compare_variations.values())
averages_compare.columns = averages_compare.columns.map(database_label)
with mystyle(graded_precision_palette(len(compare_variations)), savefig='filtered-expansion3-precision-by-db.pdf'):
averages_compare.T.plot(kind='bar', title="Filtered expansion")
plt.ylabel(measure_label('precision'))
compare_variations = OrderedDict([
('3-RN-CHD-RB-PAR.expand', 'Expand 3 steps'),
('3-RN-CHD-RB-PAR.expand.filter-gen', 'Expand 3 steps, filter'),
])
averages_compare = pd.DataFrame([
ev[ev.variation == variation].groupby('event').recall.mean()
for variation in compare_variations
], index = compare_variations.values())
averages_compare.columns = averages_compare.columns.map(event_names.get)
with mystyle(graded_recall_palette(len(compare_variations)), xrot=45, ha='right', savefig='filtered-expansion3-recall-by-event.pdf'):
averages_compare.T.plot(kind='bar', title="Filtered 3-step expansion")
plt.ylabel(measure_label('recall'))
compare_variations = OrderedDict([
('3-RN-CHD-RB-PAR.expand', 'Expand 3 steps'),
('3-RN-CHD-RB-PAR.expand.filter-gen', 'Expand 3 steps, filter'),
])
averages_compare = pd.DataFrame([
ev[ev.variation == variation].groupby('event').precision.mean()
for variation in compare_variations
], index = compare_variations.values())
averages_compare.columns = averages_compare.columns.map(event_names.get)
with mystyle(graded_precision_palette(len(compare_variations)), xrot=45, ha='right', savefig='filtered-expansion3-precision-by-event.pdf'):
averages_compare.T.plot(kind='bar', title="Filtered 3-step expansion")
plt.ylabel(measure_label('precision'))
measures = ['recall', 'precision']
averages_compare = pd.DataFrame([
ev[ev.variation == '3-RN-CHD-RB-PAR.expand.filter-gen'].groupby('database')[measure].mean()
for measure in measures
], index=map(measure_label, measures))
averages_compare.columns = averages_compare.columns.map(database_label)
#averages_compare.index = compare_variations.values()
with mystyle(measures_palette, savefig='filtered-expansion3-performance-by-db.pdf'):
averages_compare.T.plot(kind='bar', title="Performance of filtered 3-step expansion")
The drop in PPV for Myocardial infarction is caused by the mapping to codes 410.*
(Acute myocardial infarction) in Medicare which is not used in the ARS database.
stats = DataFrame()
stats['In ref'] = code_stats[code_stats.InMapping]\
.groupby('Database').Code.count()
stats['Not in DB'] = code_stats[code_stats.InMapping & ~code_stats.InDatabase]\
.groupby('Database').Code.count()
stats.fillna(0, inplace=True)
stats['%'] = (stats['Not in DB'] / stats['In ref']).map("{:.2%}".format)
stats['Codes'] = code_stats[code_stats.InMapping & ~code_stats.InDatabase]\
.groupby('Database').Code.aggregate(lambda vs: ', '.join(set(vs)))
stats
In [ ]:
In [ ]: